geno <- read_delim("tables/TableS1_genotypes.tsv")
pheno <- read_delim("tables/TableS2_phenotypes_metadata.tsv")
cipro_antibiogram <- full_join(geno,pheno)wt_colours <- c(`non-wt (I/R)`="IndianRed", `wt (S)`= "LightBlue")
res_colours <- c("I"="grey", "R"="IndianRed", "S"="LightBlue", "NWT"="grey")
res_colours2 <- c("I"="black", "R"="IndianRed", "S"="LightBlue", "NWT"="black")
res_colours3 <- c("NWT I"="#78638a", "NWT R"="IndianRed", "WT S"="LightBlue")linreg_details <- function(model) {
ci <- confint(model) %>% as_tibble(rownames="Determinant") %>%
rename(ci.lower=`2.5 %`, ci.upper=`97.5 %`)
model_summary <- summary(model)$coef %>%
as_tibble(rownames="Determinant") %>%
rename(est=Estimate, pval=`Pr(>|t|)`) %>%
select(Determinant, est, pval) %>% left_join(ci)
return(model_summary)
}cat_agree <- function(pred,truth) {
xtabs <- table(pred,factor(truth, levels = 0:1))
return((xtabs[1,1]+xtabs[2,2])/sum(xtabs))
}
ppv <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[2,2]/sum(xtabs[2,]))
}
npv <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[1,1]/sum(xtabs[1,]))
}
#sens <- function(pred,truth) {
# xtabs <- table(pred,truth)
# return(xtabs[2,2]/(sum(xtabs[,2])))
#}
sens <- function(pred,truth) {
xtabs <- table(pred,factor(truth, levels = 0:1))
return(xtabs[2,2]/(sum(xtabs[,2])))
}
spec <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[1,1]/(sum(xtabs[,1])))
}
me <- function(pred,truth) {
xtabs <- table(pred,truth)
return(xtabs[2,1]/sum(xtabs[,1]))
}
vme <- function(pred,truth) {
xtabs <- table(pred,factor(truth, levels = 0:1))
return(xtabs[1,2]/sum(xtabs[,2]))
}
metrics <- function(pred,truth) {
xtabs <- table(pred,truth)
return(list(cat=cat_agree(pred,truth),
ppv=ppv(pred,truth),
npv=npv(pred,truth),
sens=sens(pred,truth),
spec=spec(pred,truth),
me=me(pred,truth),
vme=vme(pred,truth),
n=sum(xtabs),
summary=c("cat"=cat_agree(pred,truth),
"sens"=sens(pred,truth),
"spec"=spec(pred,truth),
"me"=me(pred,truth),
"vme"=vme(pred,truth),
"n"=sum(xtabs)
)
)
)
}# prepare matrix for modelling
R_vs_dets <- cipro_antibiogram %>%
select(`GyrA-83F`:`qnrVC6`, aac6, resistant) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude rare variants
R_vs_dets <- R_vs_dets[,colSums(R_vs_dets) >= 20]
# model R vs all common variants (N>=20) = Model 1a
model_R_indiv <- logistf(resistant ~ ., data=R_vs_dets)
# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_R_indiv_aac6Interaction <- logistf(resistant ~ .*`aac(6')-Ib-cr`, data=R_vs_dets)# summarise model output
logreg_model1a <- logreg_details(model_R_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
logreg_model1b <- logreg_details(model_R_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
# all individual determinants, plus significant positive interactions
log_reg_R_plot <- logreg_model1a %>% bind_rows(logreg_model1b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd) +
scale_colour_manual(values=c(a="#f1933b", b="#bd1515")) +
theme_bw() + labs(x="Regression coefficient", col="R Model", linetype="Interaction")
log_reg_R_plot# prepare matrix for modelling
NWT_vs_dets <- cipro_antibiogram %>%
select(`GyrA-83F`:`qnrVC6`, aac6, nonWT_binary) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude genomes with rare variants
NWT_vs_dets <- NWT_vs_dets[,colSums(NWT_vs_dets) >= 20]
# model NWT vs all common variants (N>=20) = Model 3a
model_NWT_indiv <- logistf(nonWT_binary ~ ., data=NWT_vs_dets)
# model NWT vs all common variants (N>=20), each with aac6 interaction = Model 3b
model_NWT_indiv_aac6Interaction <- logistf(nonWT_binary ~ .*`aac(6')-Ib-cr`, data=NWT_vs_dets)# summarise model output
logreg_model3a <- logreg_details(model_NWT_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
logreg_model3b <- logreg_details(model_NWT_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
# get count per marker to label on the right
marker_count <- cipro_antibiogram %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(`GyrA-83F`:`qnrVC6`, `aac(6')-Ib-cr`) %>%
colSums()
marker_count_order <- logreg_model3a %>% bind_rows(logreg_model3b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
filter(Determinant != "(Intercept)") %>% pull(Determinant) %>% unique() %>% sort()
log_reg_NWT_plot <- logreg_model3a %>% bind_rows(logreg_model3b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd) +
scale_colour_manual(values=c(a="#70bfef", b="#2c15ae")) +
theme_bw() + labs(x="Regression coefficient", col="NWT Model", linetype="Interaction", y="") +
scale_y_discrete(labels=paste0("n=",marker_count[marker_count_order]), position="right")
log_reg_NWT_plotmultiVariable_vs_singleVariable <- logreg_model1a %>% select(-c(interaction, model)) %>%
setNames(paste0('R Model1a.', names(.))) %>% rename(Term='R Model1a.Determinant') %>%
full_join(logreg_model1b %>% select(-c(Determinant, interaction, model)) %>% setNames(paste0('R Model1b.', names(.)))%>% rename(Term='R Model1b.Term'), by="Term") %>%
full_join(logreg_model3a %>% select(-c(interaction, model)) %>% setNames(paste0('NWT Model1a.', names(.))) %>% rename(Term='NWT Model1a.Determinant'), by="Term") %>%
full_join(logreg_model3b %>% select(-c(Determinant, interaction, model)) %>% setNames(paste0('NWT Model1b.', names(.)))%>% rename(Term='NWT Model1b.Term'), by="Term")
write_csv(multiVariable_vs_singleVariable, file="tables/TableS6_LogRegCoef.csv")ppv_solo <- read_csv("tables/TableS5_determinantStats.csv") %>% filter(!is.na(N.solo)) %>% select(Determinant, ends_with(".solo"))
ppvR <- ppv_solo %>% mutate(p=R.solo/N.solo, se=sqrt(p*(1-p)/N.solo), ci.lower=p-1.96*se, ci.upper=p+1.96*se) %>% select(Determinant, p, ci.lower, ci.upper) %>% mutate(category="R")
ppvNWT <- ppv_solo %>% mutate(p=NWT.solo/N.solo, se=sqrt(p*(1-p)/N.solo), ci.lower=p-1.96*se, ci.upper=p+1.96*se) %>% select(Determinant, p, ci.lower, ci.upper, N.solo) %>% mutate(category="NWT")
ppvR_plot <- ppvR %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=50, linetype=2) +
geom_linerange(aes(xmin=ci.lower*100, xmax=ci.upper*100), col="#bd1515", position=pd) +
geom_point(aes(x=p*100, group=category), col="#bd1515", position=pd) +
theme_bw() + labs(x="PPV (%)")
ppvNWT_plot <- ppvNWT %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=50, linetype=2) +
geom_linerange(aes(xmin=ci.lower*100, xmax=ci.upper*100), col="#2c15ae", position=pd) +
geom_point(aes(x=p*100, group=category), col="#2c15ae", position=pd) +
theme_bw() + labs(x="PPV (%)", y="") +
scale_y_discrete(labels=paste0("n=",ppvNWT$N.solo), position="right")(ppvR_plot + ggtitle(label="a) PPV for solo marker", subtitle="R") + ppvNWT_plot + ggtitle(label="", subtitle="NWT") + plot_layout(guides="collect", axes="collect")) / (log_reg_R_plot + ggtitle(label="b) Logistic regression", subtitle="R") + log_reg_NWT_plot + ggtitle(label="", subtitle="NWT") + plot_layout(guides="collect", axes="collect"))# R models
model_R_indiv_pred <- predict(model_R_indiv, type="response")
model_R_indiv_aac6Interaction_pred <- predict(model_R_indiv_aac6Interaction, type="response")
# NWT models
model_NWT_indiv_pred <- predict(model_NWT_indiv, type="response")
model_NWT_indiv_aac6Interaction_pred <- predict(model_NWT_indiv_aac6Interaction, type="response")# R models
model_R_groups <- logistf(resistant ~ QRDR_mutations + acquired_genes + aac6, data=cipro_antibiogram)
model_R_groups_selectInteract <- logistf(resistant ~ QRDR_mutations*aac6 + acquired_genes*aac6, data=cipro_antibiogram)
# NWT models
model_NWT_groups <- logistf(nonWT_binary ~ QRDR_mutations + acquired_genes + aac6, data=cipro_antibiogram)
model_NWT_groups_selectInteract <- logistf(nonWT_binary ~ QRDR_mutations*aac6 + acquired_genes*aac6, data=cipro_antibiogram)
# R model predictions
model_R_groups_pred <- predict(model_R_groups, type="response")
model_R_groups_selectInteract_pred <- predict(model_R_groups_selectInteract, type="response")
# NWT model predictions
model_NWT_groups_pred <- predict(model_NWT_groups, type="response")
model_NWT_groups_selectInteract_pred <- predict(model_NWT_groups_selectInteract, type="response")count_model_summary <- logreg_details(model_R_groups) %>% mutate(Model="2a", response="R") %>%
bind_rows(logreg_details(model_R_groups_selectInteract) %>% mutate(Model="2b", response="R")) %>%
bind_rows(logreg_details(model_NWT_groups) %>% mutate(Model="2a", response="NWT")) %>%
bind_rows(logreg_details(model_NWT_groups_selectInteract) %>% mutate(Model="2b", response="NWT")) %>%
relocate(Model, .before=Determinant) %>% relocate(response, .before=Model) %>%
write_csv(file="tables/TableS7_CountLogRegCoef.csv")aic <- c(extractAIC(model_R_indiv)[2],
extractAIC(model_R_indiv_aac6Interaction)[2],
extractAIC(model_R_groups)[2],
extractAIC(model_R_groups_selectInteract)[2],
extractAIC(model_NWT_indiv)[2],
extractAIC(model_NWT_indiv_aac6Interaction)[2],
extractAIC(model_NWT_groups)[2],
extractAIC(model_NWT_groups_selectInteract)[2])
anova_result <- c("-",anova(model_R_indiv, model_R_indiv_aac6Interaction)$pval,
"-",anova(model_R_groups, model_R_groups_selectInteract)$pval,
"-",anova(model_NWT_indiv, model_NWT_indiv_aac6Interaction)$pval,
"-", anova(model_NWT_groups, model_NWT_groups_selectInteract)$pval)
metrics_result <- rbind(
metrics(model_R_indiv_pred>0.5, R_vs_dets$resistant)$summary,
metrics(model_R_indiv_aac6Interaction_pred>0.5, R_vs_dets$resistant)$summary,
metrics(model_R_groups_pred>0.5, cipro_antibiogram$resistant)$summary,
metrics(model_R_groups_selectInteract_pred>0.5, R_vs_dets$resistant)$summary,
metrics(model_NWT_indiv_pred>0.5, NWT_vs_dets$nonWT_binary)$summary,
metrics(model_NWT_indiv_aac6Interaction_pred>0.5, NWT_vs_dets$nonWT_binary)$summary,
metrics(model_NWT_groups_pred>0.5, cipro_antibiogram$nonWT_binary)$summary,
metrics(model_NWT_groups_selectInteract_pred>0.5, cipro_antibiogram$nonWT_binary)$summary
)
model_summary_tab <- cbind(aic, anova_result, metrics_result) %>% as_tibble() %>%
rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat) %>%
rename(`model fit (aac6 interaction)`=anova_result) %>%
rename(AIC=aic) %>%
mutate(ModelName=c("Model 1a", "Model 1b", "Model 2a", "Model 2b", "Model 1a", "Model 1b", "Model 2a", "Model 2b")) %>%
mutate(response = c(rep("R",4), rep("NWT",4))) %>%
mutate(predictors = rep(c("determinants", "determinant*aac6", "QRDR count + PMQR count + aac6","QRDR count*aac6 + PMQR count*aac6"),2))model_summary_tab %>%
mutate(Sensitivity=paste0(round(as.numeric(Sensitivity)*100,2),"%")) %>%
mutate(Specificity=paste0(round(as.numeric(Specificity)*100,2),"%")) %>%
mutate(ME=paste0(round(as.numeric(ME)*100,2),"%")) %>%
mutate(VME=paste0(round(as.numeric(VME)*100,2),"%")) %>%
mutate(`Categorical agreement`=paste0(round(as.numeric(`Categorical agreement`)*100,2),"%")) %>%
select(response, ModelName, predictors, `Categorical agreement`, Sensitivity, Specificity, ME, VME, AIC, `model fit (aac6 interaction)`) %>%
write_csv("tables/Table1a_ModelSummary.csv", na="-")cipro_antibiogram <- cipro_antibiogram %>%
mutate(QRDR_mutations_exclude = str_count(Flq_mutations, "GyrA-87G") + str_count(Flq_mutations, "GyrA-87H")) %>%
mutate(PMQR_exclude = str_count(Flq_acquired, "qnrB1.v1") + str_count(Flq_acquired, "qnrB1.v2")) %>%
mutate(rules_category = case_when(
QRDR_mutations==0 & acquired_genes==0 & aac6==0 ~ 0, # no determinants -> WT S
QRDR_mutations==1 & acquired_genes==0 & aac6==0 & QRDR_mutations_exclude==1 ~ 0, # non-associated QRDR solo -> WT S
QRDR_mutations==0 & acquired_genes==0 & aac6==1 ~ 1, # aac6 solo -> WT S
QRDR_mutations==0 & acquired_genes==1 & aac6==0 & PMQR_exclude==1 ~ 2, # non-associated PMQR solo -> NWT I
QRDR_mutations==1 & acquired_genes==0 & aac6==0 & QRDR_mutations_exclude==0 ~ 3, # 1 QRDR, except those not associated -> NWT R
QRDR_mutations==1 & acquired_genes==0 & aac6==1 ~ 4, # 1 QRDR + aac6 -> NWT R
QRDR_mutations>=2 & acquired_genes==0 ~ 5, # ≥2 QRDR -> NWT R
QRDR_mutations==0 & acquired_genes==1 & aac6==0 & PMQR_exclude==0 ~ 6, # 1 PMQR, except those not associated -> NWT R
QRDR_mutations==0 & acquired_genes==1 & aac6==1 ~ 7, # 1 PMQR + aac6
QRDR_mutations==0 & acquired_genes>=2 ~ 8, # ≥2 PMQR -> NWT R
QRDR_mutations>=1 & acquired_genes>=1 ~ 9, # QRDR + PMQR -> NWT R
TRUE ~ NA
))
# check all are assigned
table(cipro_antibiogram$rules_category)##
## 0 1 2 3 4 5 6 7 8 9
## 5680 161 160 103 23 2167 546 819 68 2440
##
## FALSE
## 12167
# assign S/R predictions to categories
cipro_antibiogram <- cipro_antibiogram %>%
mutate(predSIR = if_else(rules_category<=2, "S", "R")) %>%
mutate(predSIR = replace(predSIR, rules_category==2, "I")) %>%
mutate(predSIR = factor(predSIR, levels=c("S", "I", "R"))) %>%
mutate(predR = if_else(rules_category<=2, "S", "R")) %>%
mutate(predR = factor(predR, levels=c("S", "R"))) %>%
mutate(predNWT = if_else(rules_category<2, "WT", "NWT")) %>%
mutate(predNWT = factor(predNWT, levels=c("WT", "NWT")))
table(cipro_antibiogram$rules_category, cipro_antibiogram$predR)##
## S R
## 0 5680 0
## 1 161 0
## 2 160 0
## 3 0 103
## 4 0 23
## 5 0 2167
## 6 0 546
## 7 0 819
## 8 0 68
## 9 0 2440
##
## S I R
## 0 5680 0 0
## 1 161 0 0
## 2 0 160 0
## 3 0 0 103
## 4 0 0 23
## 5 0 0 2167
## 6 0 0 546
## 7 0 0 819
## 8 0 0 68
## 9 0 0 2440
##
## WT NWT
## 0 5680 0
## 1 161 0
## 2 0 160
## 3 0 103
## 4 0 23
## 5 0 2167
## 6 0 546
## 7 0 819
## 8 0 68
## 9 0 2440
species_R <- cipro_antibiogram %>% group_by(species) %>%
filter(species != "Klebsiella variicola subsp. tropica") %>%
filter(species != "Klebsiella quasivariicola") %>%
filter(species != "Klebsiella africana") %>%
summarise(cat=cat_agree(predR, resistant),
sens=sens(predR, resistant),
spec=spec(predR, resistant),
me=me(predR, resistant),
vme=vme(predR, resistant),
n=n())%>%
mutate(out=rep("R", 4)) %>%
relocate(out, .before=species)
species_NWT <- cipro_antibiogram %>% group_by(species) %>%
filter(species != "Klebsiella variicola subsp. tropica") %>%
filter(species != "Klebsiella quasivariicola") %>%
filter(species != "Klebsiella africana") %>%
summarise(cat=cat_agree(predNWT, nonWT_binary),
sens=sens(predNWT, nonWT_binary),
spec=spec(predNWT, nonWT_binary),
me=me(predNWT, nonWT_binary),
vme=vme(predNWT, nonWT_binary),
n=n()) %>%
mutate(out=rep("NWT", 4)) %>%
relocate(out, .before=species)
bind_rows(c("out"="R","species"="Species complex",
metrics(cipro_antibiogram$predR, cipro_antibiogram$resistant)$summary),
c("out"="NWT","species"="Species complex",
metrics(cipro_antibiogram$predNWT, cipro_antibiogram$nonWT_binary)$summary)
) %>%
mutate_at(c("cat","sens","spec","me","vme", "n"), as.double) %>%
bind_rows(species_R) %>%
bind_rows(species_NWT) %>%
rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat, N=n) %>%
mutate(Sensitivity=paste0(round(as.numeric(Sensitivity)*100,2),"%")) %>%
mutate(Specificity=paste0(round(as.numeric(Specificity)*100,2),"%")) %>%
mutate(ME=paste0(round(as.numeric(ME)*100,2),"%")) %>%
mutate(VME=paste0(round(as.numeric(VME)*100,2),"%")) %>%
mutate(`Categorical agreement`=paste0(round(as.numeric(`Categorical agreement`)*100,2),"%")) %>%
select(out, species, `Categorical agreement`, Sensitivity, Specificity, ME, VME, N) %>%
write_csv("tables/Table1b_ModelSummary.csv", na="-")ST_R <- cipro_antibiogram %>% group_by(ST) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="ST") %>%
relocate(out, category, .before=ST)%>%
rename(category_name=ST)Country_R <- cipro_antibiogram %>% group_by(Country) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Country") %>%
relocate(out, category, .before=Country)%>%
rename(category_name=Country)SourceType_R <- cipro_antibiogram %>% group_by(Source.Type) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Source") %>%
relocate(out, category, .before=Source.Type)%>%
rename(category_name=Source.Type)cipro_antibiogram$Infection.status <- cipro_antibiogram$Infection.status %>% replace_na("Unknown")
InfectionStatus_R <- cipro_antibiogram %>% group_by(Infection.status) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Inf") %>%
relocate(out, category, .before=Infection.status) %>%
rename(category_name=Infection.status)cipro_antibiogram <- cipro_antibiogram %>%
mutate(platform_type=case_when(
Laboratory.Typing.Platform=="Sensititre" ~ "Auto",
Laboratory.Typing.Platform=="Phoenix" ~ "Auto",
Laboratory.Typing.Platform=="Vitek" ~ "Auto",
Laboratory.Typing.Platform=="Manual" ~ "Manual",
TRUE ~ "Unknown"
))
platform_R <- cipro_antibiogram %>%
group_by(platform_type) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
n_dataset=n_distinct(index),
susceptible=n-sum(resistant),
resistant=sum(resistant),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Plat") %>%
relocate(out, category, .before=platform_type)%>%
rename(category_name=platform_type)
platform_RCombined_R <- bind_rows(ST_R, Country_R, SourceType_R, InfectionStatus_R, platform_R)
Combined_R <- Combined_R %>%
rename(Sensitivity=sens, Specificity=spec, ME=me, VME=vme, `Categorical agreement`=cat, N=n, `Resistant`=percent_R, Category=category, Prediction=out, `Category Name`=category_name) %>%
write_csv("tables/TableSX_ModelSummary_Combined.csv", na="-")
#filter for categories: susceptible genomes >10, more than 25 genomes, more than 2 datasets
Combined_R_100 <- Combined_R %>%
filter(susceptible>10) %>%
filter(N>25) %>%
filter(n_dataset >=3) %>%
mutate(Category_label=paste(`Category Name`, " (n=", N,", S=", susceptible, ")", sep = "")) %>%
select(Category, Category_label, N, Sensitivity, Specificity, `Resistant`)
# convert table from wide to long for plotting
Combined_R_100_long<-gather(Combined_R_100, `Evaluation metric`, value, Sensitivity:`Resistant`)
Combined_R_100_long <- Combined_R_100_long[order(Combined_R_100_long$N, decreasing = TRUE),]
Combined_R_100_plot <-ggplot(Combined_R_100_long, aes(as.numeric(value),fct_inorder(as.factor(Category_label)))) +
geom_point(aes(shape = `Evaluation metric`, size=`Evaluation metric`, colour=`Evaluation metric`), alpha=0.5) +
scale_shape_manual(values = c(Sensitivity=15, Specificity=18, Resistant=3)) +
scale_size_manual(values = c(Sensitivity=3, Specificity=3, Resistant=1)) +
scale_colour_manual(values=c(Sensitivity="#8eb567", Specificity="#9a98ea", Resistant="IndianRed")) +
scale_y_discrete(limits=rev)+
facet_grid(Category~., scales = "free", space='free')+
labs(y="", x = "%") +
theme_bw()+
theme(legend.title=element_blank())
Combined_R_100_plotStudy_R <- cipro_antibiogram %>% group_by(index) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="index") %>%
relocate(out, category, .before=index)%>%
rename(category_name=index)platform_R <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method!="Disk diffusion") %>%
group_by(Laboratory.Typing.Platform) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Laboratory.Typing.Platform") %>%
relocate(out, category, .before=Laboratory.Typing.Platform)%>%
rename(category_name=Laboratory.Typing.Platform)
platform_Rcipro_antibiogram %>%
filter(Laboratory.Typing.Method!="Disk diffusion") %>%
group_by(Laboratory.Typing.Platform) %>% summarise(studies=length(unique(index)))method_R <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
summarise(sens=sens(predR, resistant)*100,
spec=spec(predR, resistant)*100,
cat=cat_agree(predR, resistant)*100,
me=me(predR, resistant)*100,
vme=vme(predR, resistant)*100,
n=n(),
percent_R=(sum(resistant))/n*100) %>%
mutate(out="R") %>%
mutate(category="Laboratory.Typing.Method") %>%
relocate(out, category, .before=Laboratory.Typing.Method)%>%
rename(category_name=Laboratory.Typing.Method)cipro_antibiogram <- cipro_antibiogram %>%
mutate(correctR=case_when(predR=="R" & resistant==1 ~ 1, predR=="S" & resistant==0 ~ 1, TRUE ~ 0)) %>%
mutate(method=if_else(Laboratory.Typing.Method=="Disk diffusion", "DD", "MIC"))
cipro_antibiogram %>% group_by(correctR, predR, resistant) %>% count()study_method <- logistf(correctR ~ method + index + Source.Type + Infection.status, data=cipro_antibiogram)
summary(study_method)## logistf(formula = correctR ~ method + index + Source.Type + Infection.status,
## data = cipro_antibiogram)
##
## Model fitted by Penalized ML
## Coefficients:
## coef se(coef) lower 0.95 upper 0.95
## (Intercept) 5.8570120 0.8564655 4.2184530 7.658961177
## methodMIC -0.0899076 0.4747928 -1.0180428 0.878769944
## indexBARNARDS -1.4160665 0.5737283 -2.7364322 -0.419611810
## indexBSAC -0.6989136 0.5945535 -2.0472841 0.354006636
## indexColombia_GHRU 0.1813858 0.6119892 -1.1899732 1.283355039
## indexDenmark_MedVetKlebs -2.2117851 1.5600901 -5.1541267 2.848904929
## indexEgli -1.1519461 0.6538546 -2.5879685 0.041925492
## indexEllington 1.7138286 1.5195999 -0.6446311 6.621759032
## indexHeinz_2019 -1.4100155 0.5849177 -2.7456413 -0.383978292
## indexHuynh_2020 -0.6671717 0.8110845 -2.3647398 0.897857811
## indexIndia_GHRU -0.1874074 0.5690257 -1.5012259 0.797115455
## indexItaly_MedVetKlebs -1.7376382 1.0376602 -3.7402136 0.737418522
## indexKASPAH -0.4145582 0.5798172 -1.7433452 0.598982126
## indexKLEBGAP 0.4362343 0.7217077 -1.1128863 1.811414866
## indexLeangapichart_2021 -3.1431028 0.6964239 -4.6461842 -1.848722010
## indexLong 0.6524075 0.5722424 -0.6657149 1.645699290
## indexMathers -0.3750241 0.8500158 -2.0521966 1.450809422
## indexMilan_2019 0.2437530 0.8087871 -1.3985282 1.887236282
## indexNigeria_GHRU -0.4019747 0.6792921 -1.8625070 0.890156733
## indexPerdigao_2022 -1.3995000 0.7407401 -2.9854852 0.005257405
## indexPhilippines -0.9322924 0.5873938 -2.2711442 0.100891220
## indexPotter_2018 0.2861738 1.0997035 -1.7833069 2.796837808
## indexSands_2021 -3.6366440 0.9330159 -5.5596073 -1.803232729
## indexSPARK -0.2411907 0.5719818 -1.5595632 0.751221034
## indexStoesser_2013 -0.4820626 0.7316590 -2.0107663 0.964989633
## indexTorok_2020 1.0511654 0.6248395 -0.3393404 2.186982133
## indexWright 0.2867726 0.8429557 -1.3770025 2.101823402
## Source.TypeEnvironment 0.1880697 0.7157825 -1.1420680 1.874948165
## Source.TypeFeed -0.3903375 1.6841289 -3.4914472 4.649184688
## Source.TypeFood 1.3220676 1.4750241 -1.6730186 6.326619101
## Source.TypeHuman -2.0098044 0.4242724 -2.9213898 -1.219666222
## Source.TypeMarine -0.4111707 1.4769433 -2.5741050 4.467733343
## Infection.statusInfection -0.5972647 0.2096206 -1.0231962 -0.192208138
## Infection.statusUnknown -0.6946898 0.2912958 -1.2837835 -0.131104697
## Chisq p method
## (Intercept) 58.81523742 1.731948e-14 2
## methodMIC 0.03328552 8.552349e-01 2
## indexBARNARDS 8.52073728 3.511222e-03 2
## indexBSAC 1.58913641 2.074496e-01 2
## indexColombia_GHRU 0.08496624 7.706768e-01 2
## indexDenmark_MedVetKlebs 1.14944935 2.836645e-01 2
## indexEgli 3.56170038 5.912714e-02 2
## indexEllington 1.84595483 1.742546e-01 2
## indexHeinz_2019 7.80285160 5.216385e-03 2
## indexHuynh_2020 0.67782951 4.103351e-01 2
## indexIndia_GHRU 0.11382632 7.358293e-01 2
## indexItaly_MedVetKlebs 2.11237987 1.461119e-01 2
## indexKASPAH 0.56282928 4.531225e-01 2
## indexKLEBGAP 0.34177884 5.588042e-01 2
## indexLeangapichart_2021 23.86842573 1.031498e-06 2
## indexLong 1.10463487 2.932512e-01 2
## indexMathers 0.19016311 6.627809e-01 2
## indexMilan_2019 0.09062892 7.633791e-01 2
## indexNigeria_GHRU 0.36125861 5.478081e-01 2
## indexPerdigao_2022 3.81150070 5.090183e-02 2
## indexPhilippines 3.06768967 7.986290e-02 2
## indexPotter_2018 0.06830039 7.938277e-01 2
## indexSands_2021 14.95387013 1.101719e-04 2
## indexSPARK 0.18874060 6.639669e-01 2
## indexStoesser_2013 0.43843216 5.078802e-01 2
## indexTorok_2020 2.34112846 1.259982e-01 2
## indexWright 0.11711311 7.321869e-01 2
## Source.TypeEnvironment 0.06677484 7.960918e-01 2
## Source.TypeFeed 0.04954485 8.238572e-01 2
## Source.TypeFood 0.73714824 3.905759e-01 2
## Source.TypeHuman 30.25397897 3.790129e-08 2
## Source.TypeMarine 0.06909543 7.926584e-01 2
## Infection.statusInfection 8.50808728 3.535715e-03 2
## Infection.statusUnknown 5.88538359 1.526707e-02 2
##
## Method: 1-Wald, 2-Profile penalized log-likelihood, 3-None
##
## Likelihood ratio test=397.9632 on 33 df, p=0, n=12167
## Wald test = 3883.461 on 33 df, p = 0
category_phenotype_table <- cipro_antibiogram %>%
group_by(rules_category, resistant) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=resistant, values_from=n, values_fill=0) %>%
rename(SI=`0`, R=`1`)
category_phenotype_table <- cipro_antibiogram %>%
group_by(rules_category, WT_vs_nonWT) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=WT_vs_nonWT, values_from=n, values_fill=0) %>%
rename(NWT=`non-wt (I/R)`, WT=`wt (S)`) %>%
full_join(category_phenotype_table)
category_phenotype_table <- category_phenotype_table %>%
mutate(N=NWT+WT) %>%
mutate(`R PPV`=paste0(round(R/N*100,2), "% (N=", R,"/",N,")")) %>%
mutate(`NWT PPV`=paste0(round(NWT/N*100,2), "% (N=", NWT,"/",N,")"))
category_phenotype_tablepredR_summary <- cipro_antibiogram %>%
group_by(predR, resistant) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=resistant, values_from=n, values_fill=0) %>%
rename(SI=`0`, R=`1`) %>%
mutate(N=SI+R) %>%
mutate(PPV = R/N)
predNWT_summary <- cipro_antibiogram %>%
group_by(predNWT, WT_vs_nonWT) %>%
count() %>%
ungroup() %>%
pivot_wider(names_from=WT_vs_nonWT, values_from=n, values_fill=0) %>%
mutate(N=`non-wt (I/R)`+`wt (S)`) %>%
mutate(PPV = `non-wt (I/R)`/N)
category_phenotype_table %>%
mutate(QRDR=c("0^", "0", "0", "1", "1", ">1", "0", "0", "0", ">0")) %>%
mutate(PMQR=c("0", "0", "qnrB1", "0", "0", "0", "1^", "1", ">1", ">0")) %>%
mutate(aac6=c("0", "1", "0", "0", "1", "*", "0", "1", "*", "*")) %>%
mutate(Prediction=c("WT S", "WT S", "NWT I", rep("NWT R", 7))) %>%
mutate(N=as.character(N)) %>%
select(QRDR, PMQR, aac6, Prediction, N, `R PPV`, `NWT PPV`) %>%
bind_rows(c(QRDR="Overall", PMQR="", aac6="", Prediction="-", N=as.character(nrow(cipro_antibiogram)),
`R PPV`=paste0(round(predR_summary[2, "PPV"]*100,2),"% (N=", predR_summary[2,"R"], "/", predR_summary[2,"N"],")"),
`NWT PPV`=paste0(round(predNWT_summary[2, "PPV"]*100,2),"% (N=", predNWT_summary[2,"non-wt (I/R)"], "/", predNWT_summary[2,"N"],")")
)) %>%
write_csv(file="tables/Table2_Classifier.csv")
write(x="\n^=GyrA-87G and GyrA-87H are not included in QRDR count; qnrB1 is excluded from the single-PMQR count. *aac6 presence is not considered", file="tables/Table2_Classifier.csv", append=T)# MIC distribution R vs. S
rules_MIC_distributionRS <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
mutate(predSIR_label=case_when(predSIR=="S" ~ "WT S", predSIR=="R" ~ "NWT R", TRUE ~ "NWT I")) %>%
mutate(predSIR_label=factor(predSIR_label, levels=c("WT S", "NWT I", "NWT R"))) %>%
ggplot(aes(as.factor(round(as.numeric(Measurement), digits=2)), fill = predSIR_label)) + geom_bar() +
labs(title = "A) MIC, by classifer prediction", y = "Number of genomes", x="MIC (mg/L)",
linetype = "MIC breakpoints", fill="Classifier prediction") +
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
scale_fill_manual(values = res_colours3)+
geom_vline(aes(xintercept = 5.5), color = "black") +
annotate("text", x=5.75, y=2900, label="R", size=2.5)+
geom_vline(aes(xintercept = 4.5), color = "black") +
annotate("text", x=4.25, y=2900, label="S", size=2.5)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=11))
rules_MIC_distributionRS# DD distribution
rules_DD_distributionRS <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
mutate(predSIR_label=case_when(predSIR=="S" ~ "WT S", predSIR=="R" ~ "NWT R", TRUE ~ "NWT I")) %>%
mutate(predSIR_label=factor(predSIR_label, levels=c("WT S", "NWT I", "NWT R"))) %>%
ggplot(aes(as.numeric(Measurement), fill = predSIR_label)) + geom_bar() +
labs(title = "B) Disk zones, by classifer prediction", y = "Number of genomes", x ="Disk zone (mm)",
linetype = "Disk diffusion \nbreakpoints") +
scale_fill_manual(values = res_colours3)+
geom_vline(aes(xintercept = 21.5), color = "black") +
annotate("text", x=20.5, y=400, label="R", size=2.5)+
geom_vline(aes(xintercept = 24.5), color = "black") +
#geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
annotate("text", x=25.5, y=400, label="S", size=2.5)+
theme_minimal() +
theme(axis.text.x = element_text(size=11))+
theme(legend.position = "none")
rules_DD_distributionRSprofile_labels <- paste(
paste0(c("0^", "0", "0", "1", "1", ">1", "0", "0", "0", ">0"), rep(" QRDR,", 10)),
c("0 PMQR,", "0 PMQR,", "qnrB1,", "0 PMQR,", "0 PMQR,", "0 PMQR", "1 PMQR^,", "1 PMQR,", ">1 PMQR", ">0 PMQR"),
c("aac6-", "aac6+", "aac6-", "aac6-", "aac6+", "", "aac6-", "aac6+", "", "")
)
names(profile_labels)<-0:9
profile_colours <- c("0" = "#9DDFFE", "1"="#1A83B6", "2"="#98d3a4", "3"="#fbe8a5", "4"="#ecb16f", "5"="#f17c1b", "6"="#fdc0c0", "7"="#e88f8f", "8"="#b92020", "9"="#59124b")rules_MIC_distribution <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
ggplot(aes(x = as.factor(round(as.numeric(Measurement), digits=2)), fill = as.factor(rules_category))) + geom_bar() +
labs(x = "MIC (mg/L)", y = "Number of genomes",
linetype = "MIC breakpoints", title="A) MIC, by genotype profile", fill="Genotype profile") +
scale_fill_manual(values=profile_colours, labels=profile_labels)+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 5.5), color = "black") +
annotate("text", x=5.75, y=2900, label="R", size=2.5)+
geom_vline(aes(xintercept = 4.5), color = "black") +
annotate("text", x=4.25, y=2900, label="S", size=2.5)+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=11))
rules_MIC_distribution# DD distribution
rules_DD_distribution <- cipro_antibiogram %>%
group_by(Laboratory.Typing.Method) %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
ggplot(aes(as.numeric(Measurement), fill = as.factor(rules_category))) + geom_bar() +
scale_fill_manual(values=profile_colours, labels=profile_labels)+
geom_vline(aes(xintercept = 21.5), color = "black") +
annotate("text", x=20.5, y=400, label="R", size=2.5)+
geom_vline(aes(xintercept = 24.5), color = "black", linetype=2) +
geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
annotate("text", x=26.5, y=400, label="S", size=2.5)+
theme_minimal()+
labs(y = "Number of genomes", x="Disk zone (mm)", title="B) Disk zones, by genotype profile") +
theme_minimal() +
theme(axis.text.x = element_text(size=11))+
theme(legend.position = "none")rules_category_SIR_plot <- cipro_antibiogram %>%
ggplot(aes(x=as.factor(rules_category), fill=factor(Resistance.phenotype, levels = c("S", "I", "R")))) +
geom_bar(position = "fill") + coord_flip() +
scale_y_continuous(labels = scales::percent)+
labs(title="C) Observed phenotype distribution, per genotype profile", x="Genotype profile", y="Proportion (%)", fill="Phenotype") +
scale_fill_manual(values = res_colours)+
scale_x_discrete(limits=rev, labels=profile_labels) +
geom_text_repel(aes(label = after_stat(count)), stat = "count", size=3, direction="x", position=position_fill(0.4)) +
theme_minimal()
rules_category_SIR_plotdd_atu <- cipro_antibiogram %>% filter(Resistance.phenotype!="R" & predSIR=="R") %>% filter(Laboratory.Typing.Method == "Disk diffusion") %>% filter(as.numeric(Measurement) >21 & as.numeric(Measurement)<24)
mic_atu <- cipro_antibiogram %>% filter(Resistance.phenotype!="R" & predSIR=="R") %>% filter(Laboratory.Typing.Method != "Disk diffusion") %>% filter(as.numeric(Measurement) ==0.5)
atu <- bind_rows(mic_atu, mic_atu)
atu %>% group_by(Flq_acquired, Flq_mutations) %>% count() %>% mutate(pc=n/nrow(atu))## [1] "KASPAH" "KLEBGAP" "India_GHRU"
## [4] "Leangapichart_2021" "Philippines" "Egli"
## [7] "Long" "Mathers" "Italy_MedVetKlebs"
## [10] "BSAC" "Milan_2019" "Colombia_GHRU"
## [13] "BARNARDS" "SPARK" "Nigeria_GHRU"
## [16] "Stoesser_2013" "Heinz_2019" "Bachman_2022"
## [1] "Klebsiella pneumoniae"
## [2] "Klebsiella quasipneumoniae subsp. quasipneumoniae"
## [3] "Klebsiella quasipneumoniae subsp. similipneumoniae"
# number R with no res determinants
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow()## [1] 140
## [1] 6177
# assay measures for unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(Laboratory.Typing.Method!="Disk diffusion" & Measurement==1) %>% nrow()## [1] 53
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(Laboratory.Typing.Method=="Disk diffusion" & Measurement %in% c(20, 21)) %>% nrow()## [1] 20
# STs for unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(ST) %>% count()cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(ST) %>% count() %>% nrow()## [1] 84
# porin mutations in all genomes without QRDR/PMQR/aac6
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0& aac6==0 & Omp_mutations !="-") %>% nrow()## [1] 94
## [1] 5671
(cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0& aac6==0 & Omp_mutations !="-") %>% nrow()) / (cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow())## [1] 0.01657556
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% group_by(Omp_mutations) %>% count()cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(grepl('OmpK35', Omp_mutations))%>% nrow()## [1] 45
cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% filter(grepl('OmpK36', Omp_mutations))%>% nrow()## [1] 53
# genomes without QRDR/PMQR/aac6
noResDet <- cipro_antibiogram %>% filter(QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% mutate(ompK35=if_else(grepl('OmpK35', Omp_mutations), 1, 0)) %>% mutate(ompK36=if_else(grepl('OmpK36', Omp_mutations), 1, 0))
fisher.test(noResDet$ompK35, noResDet$resistant)##
## Fisher's Exact Test for Count Data
##
## data: noResDet$ompK35 and noResDet$resistant
## p-value = 6.934e-09
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 6.15008 28.58310
## sample estimates:
## odds ratio
## 13.761
##
## 2-sample test for equality of proportions with continuity correction
##
## data: table(noResDet$ompK35, noResDet$resistant)[2:1, 2:1]
## X-squared = 82.013, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## 0.08469031 0.35834006
## sample estimates:
## prop 1 prop 2
## 0.24444444 0.02292926
##
## Fisher's Exact Test for Count Data
##
## data: noResDet$ompK36 and noResDet$resistant
## p-value = 0.04102
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.8500308 9.1624131
## sample estimates:
## odds ratio
## 3.289251
##
## 2-sample test for equality of proportions with continuity correction
##
## data: table(noResDet$ompK36, noResDet$resistant)[2:1, 2:1]
## X-squared = 3.7993, df = 1, p-value = 0.05127
## alternative hypothesis: two.sided
## 95 percent confidence interval:
## -0.02948782 0.13201541
## sample estimates:
## prop 1 prop 2
## 0.0754717 0.0242079
# porin mutations in unexplained R
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% filter(Omp_mutations!="-") %>% nrow()## [1] 14
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% filter(Omp_mutations!="-") %>% nrow()/ (cipro_antibiogram %>% filter(Resistance.phenotype=="R" & QRDR_mutations==0 & acquired_genes==0 & aac6==0) %>% nrow())## [1] 0.1
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% group_by(Omp_mutations) %>% count()# assay measures for unexplained R with porin mutations
cipro_antibiogram %>% filter(Resistance.phenotype=="R" & Flq_acquired=="-" & Flq_mutations=="-" & aac6==0) %>% group_by(Omp_mutations, Measurement, Measurement.units) %>% count() %>% filter(Omp_mutations!="-") %>% arrange(Measurement)# number S with mutations
S_mut <- cipro_antibiogram %>% filter(Resistance.phenotype=="S" & ((QRDR_mutations==1 & Flq_mutations!="GyrA-87G"& Flq_mutations!="GyrA-87H") | (acquired_genes==1 & !grepl('qnrB1.', Flq_acquired))))
S_mut %>% nrow()## [1] 41
# assay measures for unexplained R
S_mut %>% filter(Laboratory.Typing.Method!="Disk diffusion" & Measurement==0.25) %>% nrow()## [1] 30
S_mut %>% filter(Laboratory.Typing.Method=="Disk diffusion" & Measurement %in% c(25, 26)) %>% nrow()## [1] 4
## [1] 33
# assay measures for unexplained R with porin mutations
S_mut %>% group_by(Flq_acquired, Flq_mutations, Measurement, Measurement.units) %>% count() %>% arrange(Measurement)MIC_vs_dets <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method!="Disk diffusion") %>%
select(`GyrA-83F`:`qnrVC6`, aac6, Measurement) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude genomes with rare variants
MIC_vs_dets <- MIC_vs_dets[,colSums(MIC_vs_dets) >= 20]
# model MIC vs all common variants (N>=20) = Model 1a
model_MIC_indiv <- lm(log2(Measurement) ~ ., data=MIC_vs_dets)
# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_MIC_indiv_aac6Interaction <- lm(log2(Measurement) ~ .*`aac(6')-Ib-cr`, data=MIC_vs_dets)# summarise model output
linreg_MIC_model1a <- linreg_details(model_MIC_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
linreg_MIC_model1b <- linreg_details(model_MIC_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
# all individual determinants, plus significant positive interactions
lin_reg_MIC_plot <- linreg_MIC_model1a %>% bind_rows(linreg_MIC_model1b) %>%
filter((pval<0.05 & est>0) | is.na(interaction)) %>%
mutate(interaction=factor(replace(interaction, is.na(interaction), "none"), levels=c("none", "aac(6')-Ib-cr"))) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd) +
scale_colour_manual(values=c(a="grey", b="black")) +
theme_bw() + labs(x="Regression coefficient", col="Model", linetype="Interaction")
lin_reg_MIC_plotDD_vs_dets <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method=="Disk diffusion") %>%
select(`GyrA-83F`:`qnrVC6`, aac6, Measurement) %>%
rename(`aac(6')-Ib-cr`=aac6) %>%
select(-c(ends_with('?'), ends_with('*'))) # remove imprecise matches
# exclude genomes with rare variants
DD_vs_dets <- DD_vs_dets[,colSums(DD_vs_dets) >= 20]
# model MIC vs all common variants (N>=20) = Model 1a
model_DD_indiv <- lm(Measurement ~ ., data=DD_vs_dets)
# model R vs all common variants (N>=20), each with aac6 interaction = Model 1b
model_DD_indiv_aac6Interaction <- lm(Measurement ~ .*`aac(6')-Ib-cr`, data=DD_vs_dets)# summarise model output
linreg_DD_model1a <- linreg_details(model_DD_indiv) %>% mutate(interaction=NA) %>% mutate(model="a") %>% relocate(interaction, .after=Determinant) %>%
mutate(Determinant=gsub("`","",Determinant))
linreg_DD_model1b <- linreg_details(model_DD_indiv_aac6Interaction) %>%
rename(Term=Determinant) %>%
separate_wider_delim(Term, ":", names=c("Determinant","interaction"), too_few="align_start", cols_remove=F) %>%
mutate(model="b") %>%
mutate(Determinant=gsub("`","",Determinant)) %>%
mutate(Term=gsub("`","",Term)) %>%
mutate(interaction=gsub("`","",interaction))
# all individual determinants, plus significant positive interactions
lin_reg_DD_plot <- linreg_DD_model1a %>% bind_rows(linreg_DD_model1b) %>%
filter((pval<0.05 & est<0) | is.na(interaction)) %>%
mutate(interaction=replace(interaction, is.na(interaction), "-")) %>%
filter(Determinant != "(Intercept)") %>%
ggplot(aes(y=Determinant)) +
geom_vline(xintercept=0) +
geom_linerange(aes(xmin=ci.lower, xmax=ci.upper, group=model, col=model, linetype=interaction), position=pd) +
geom_point(aes(x=est, group=model, col=model), position=pd) +
scale_colour_manual(values=c(a="grey", b="black")) +
theme_bw() + labs(x="Regression coefficient", col="Model", linetype="Interaction") +
theme(legend.position="none")
lin_reg_DD_plotlin_reg_MIC_plot + ggtitle("MIC model") + lin_reg_DD_plot + ggtitle("Disk diffusion zone model") + plot_layout(axes="collect", guides="collect")ggsave(width=8, height=5, file="figs/FigX_regressionMIC_DD_Models.pdf")
ggsave(width=8, height=5, file="figs/FigX_regressionMIC_DD_Models.png")cipro_antibiogram %>% filter(Laboratory.Typing.Method!="Disk diffusion") %>% ggplot(aes(x=factor(rules_category),y=Measurement)) + geom_boxplot() + scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3))rules_MIC_distribution_separate <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
mutate(profiles=case_when(rules_category==0 ~ profile_labels["0"],
rules_category==1 ~ profile_labels["1"],
rules_category==2 ~ profile_labels["2"],
rules_category==3 ~ profile_labels["3"],
rules_category==4 ~ profile_labels["4"],
rules_category==5 ~ profile_labels["5"],
rules_category==6 ~ profile_labels["6"],
rules_category==7 ~ profile_labels["7"],
rules_category==8 ~ profile_labels["8"],
rules_category==9 ~ profile_labels["9"],
TRUE ~ NA
)) %>%
mutate(profiles=fct_relevel(profiles, profile_labels)) %>%
ggplot(aes(x = as.factor(round(as.numeric(Measurement), digits=2)),
fill = profiles)) +
geom_bar() +
labs(x = "MIC (mg/L)", y = "Number of genomes") +
scale_fill_manual(values=unname(profile_colours))+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 5.5), color = "black") +
geom_vline(aes(xintercept = 4.5), color = "black") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=8),
axis.text.y = element_text(size=7),
legend.position="none") +
facet_wrap(~profiles, scales="free_y", ncol=1)
rules_MIC_distribution_separatediscrete_quantile <- function(x, p) {
sort(x)[length(x)*p]
}
cipro_antibiogram <- cipro_antibiogram %>%
mutate(profiles=case_when(rules_category==0 ~ profile_labels["0"],
rules_category==1 ~ profile_labels["1"],
rules_category==2 ~ profile_labels["2"],
rules_category==3 ~ profile_labels["3"],
rules_category==4 ~ profile_labels["4"],
rules_category==5 ~ profile_labels["5"],
rules_category==6 ~ profile_labels["6"],
rules_category==7 ~ profile_labels["7"],
rules_category==8 ~ profile_labels["8"],
rules_category==9 ~ profile_labels["9"],
TRUE ~ NA
)) %>%
mutate(profiles=fct_relevel(profiles, profile_labels))
MIC_pred <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
group_by(profiles) %>%
summarise(median=median(Measurement),
lower25=discrete_quantile(as.numeric(Measurement), p=0.25),
upper25=discrete_quantile(as.numeric(Measurement), p=0.75)) %>%
mutate(IQR=paste0(median, " [", lower25, "-", upper25, "]"))
MIC_predrules_MIC_distribution_separate <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method != "Disk diffusion") %>%
left_join(MIC_pred) %>%
mutate(y=case_when(rules_category==0 ~ 1000,
rules_category==1 ~ 50,
rules_category==2 ~ 25,
rules_category==3 ~ 15,
rules_category==4 ~ 4,
rules_category==5 ~ 500,
rules_category==6 ~ 100,
rules_category==7 ~ 200,
rules_category==8 ~ 10,
rules_category==9 ~ 1000,
TRUE ~ NA
)) %>%
ggplot(aes(x = as.factor(round(as.numeric(Measurement), digits=2)),
fill = profiles)) +
geom_bar() +
#geom_text(aes(label=IQR, y=y), x=as.factor("256"), check_overlap = T, size=3) +
labs(x = "MIC (mg/L)", y = "Number of genomes") +
scale_fill_manual(values=c("LightBlue", "LightBlue", "#78638a", rep("IndianRed", 7)))+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 5.5), color = "black") +
geom_vline(aes(xintercept = 4.5), color = "black") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=8),
axis.text.y = element_text(size=7),
legend.position="none") +
facet_wrap(~profiles, scales="free_y", ncol=1)
rules_MIC_distribution_separaterules_DD_distribution_separate <- cipro_antibiogram %>%
filter(Laboratory.Typing.Method == "Disk diffusion") %>%
mutate(profiles=case_when(rules_category==0 ~ profile_labels["0"],
rules_category==1 ~ profile_labels["1"],
rules_category==2 ~ profile_labels["2"],
rules_category==3 ~ profile_labels["3"],
rules_category==4 ~ profile_labels["4"],
rules_category==5 ~ profile_labels["5"],
rules_category==6 ~ profile_labels["6"],
rules_category==7 ~ profile_labels["7"],
rules_category==8 ~ profile_labels["8"],
rules_category==9 ~ profile_labels["9"],
TRUE ~ NA
)) %>%
mutate(profiles=fct_relevel(profiles, profile_labels)) %>%
ggplot(aes(x = as.numeric(Measurement), fill = profiles)) +
geom_bar() +
labs(y = "Number of genomes",
x="Disk zone (mm)",
title="B) Disk zones, by genotype profile") +
scale_fill_manual(values=unname(profile_colours))+
# round off x-axis labels to 2dp
scale_x_discrete(labels = function(x) round(as.numeric(x), digits = 2)) +
geom_vline(aes(xintercept = 21.5), color = "black") +
geom_vline(aes(xintercept = 24.5), color = "black", linetype=2) +
geom_vline(aes(xintercept = 25.5), color = "black", linetype=2) +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust=1, size=10),
legend.position="none") +
facet_wrap(~profiles, scales="free_y", ncol=2)
rules_DD_distribution_separate